{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_hub.openalex import OpenAlexReader\n",
    "from llama_index.llms import OpenAI\n",
    "from llama_index.query_engine import CitationQueryEngine\n",
    "from llama_index import (\n",
    "    VectorStoreIndex,\n",
    "    ServiceContext,\n",
    ")\n",
    "from llama_index.response.notebook_utils import display_response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# change this to your email\n",
    "openalex_reader = OpenAlexReader(email=\"shauryr@gmail.com\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"biases in large language models\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "works = openalex_reader.load_data(query, full_text=False)\n",
    "service_context = ServiceContext.from_defaults(\n",
    "    llm=OpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
    ")\n",
    "index = VectorStoreIndex.from_documents(works, service_context=service_context)\n",
    "\n",
    "query_engine = CitationQueryEngine.from_args(\n",
    "    index,\n",
    "    similarity_top_k=10,\n",
    "    citation_chunk_size=1024,\n",
    ")\n",
    "# query the index\n",
    "response = query_engine.query(\n",
    "    \"list the biases in large language models in a markdown table\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "| Source | Biases |\n",
       "|--------|--------|\n",
       "| Source 1 | Data selection bias, social bias (gender, age, sexual orientation, ethnicity, religion, culture) |\n",
       "| Source 2 | Biases of what is right and wrong to do, reflecting ethical and moral norms of society |\n",
       "| Source 3 | Anti-Muslim bias |\n",
       "| Source 6 | Gender bias |\n",
       "| Source 9 | Anti-LGBTQ+ bias |\n",
       "| Source 10 | Potential bias in the output |"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# import mardown ipython\n",
    "from IPython.display import Markdown\n",
    "\n",
    "Markdown(response.response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "**`Final Response:`** | Source | Biases |\n",
       "|--------|--------|\n",
       "| Source 1 | Data selection bias, social bias (gender, age, sexual orientation, ethnicity, religion, culture) |\n",
       "| Source 2 | Biases of what is right and wrong to do, reflecting ethical and moral norms of society |\n",
       "| Source 3 | Anti-Muslim bias |\n",
       "| Source 6 | Gender bias |\n",
       "| Source 9 | Anti-LGBTQ+ bias |\n",
       "| Source 10 | Potential bias in the output |"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 1/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** 9d42ce3b-b1d1-401b-982b-fb4ec3de7d11<br>**Similarity:** 0.8639981601632382<br>**Text:** Source 1:\n",
       "Biases in Large Language Models: Origins, Inventory, and Discussion In this article, we...<br>**Metadata:** {'title': 'Biases in Large Language Models: Origins, Inventory, and Discussion', 'keywords': None, 'primary_location': 'Journal of Data and Information Quality', 'publication_year': 2023, 'authorships': ['Roberto Navigli', 'Simone Conia', 'Björn Roß']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 2/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** 518b9288-1cc7-42ca-81af-c8598bd8f526<br>**Similarity:** 0.8397403491959685<br>**Text:** Source 2:\n",
       "Large pre-trained language models contain human-like biases of what is right and wrong ...<br>**Metadata:** {'title': 'Large pre-trained language models contain human-like biases of what is right and wrong to do', 'keywords': None, 'primary_location': 'Nature Machine Intelligence', 'publication_year': 2022, 'authorships': ['Patrick Schramowski', 'Cigdem Turan', 'Nico Andersen', 'Constantin A. Rothkopf', 'Kristian Kersting']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 3/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** 71bd1dc3-7794-4ddc-9590-a88a8a402671<br>**Similarity:** 0.8364776873374511<br>**Text:** Source 3:\n",
       "Persistent Anti-Muslim Bias in Large Language Models It has been observed that large-sc...<br>**Metadata:** {'title': 'Persistent Anti-Muslim Bias in Large Language Models', 'keywords': None, 'primary_location': 'arXiv (Cornell University)', 'publication_year': 2021, 'authorships': ['Abubakar Abid', 'Maheen Farooqi', 'James Zou']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 4/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** 34b62bb9-4c1c-4d6d-8cf5-db95f402b1ea<br>**Similarity:** 0.8364375114002981<br>**Text:** Source 4:\n",
       "Should ChatGPT be Biased? Challenges and Risks of Bias in Large Language\n",
       "  Models As th...<br>**Metadata:** {'title': 'Should ChatGPT be Biased? Challenges and Risks of Bias in Large Language\\n  Models', 'keywords': None, 'primary_location': 'arXiv (Cornell University)', 'publication_year': 2023, 'authorships': ['Emilio Ferrara']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 5/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** 131de912-efb6-4077-9572-1c1c4a39d06a<br>**Similarity:** 0.834294145607976<br>**Text:** Source 5:\n",
       "Persistent Anti-Muslim Bias in Large Language Models It has been observed that large-sc...<br>**Metadata:** {'title': 'Persistent Anti-Muslim Bias in Large Language Models', 'keywords': None, 'primary_location': None, 'publication_year': 2021, 'authorships': ['Abubakar Abid', 'Maheen Farooqi', 'James Zou']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 6/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** e503fd0d-5ee6-429b-9942-9c0a651689fd<br>**Similarity:** 0.8167979370445425<br>**Text:** Source 6:\n",
       "Reducing Gender Bias in Abusive Language Detection Abusive language detection models te...<br>**Metadata:** {'title': 'Reducing Gender Bias in Abusive Language Detection', 'keywords': None, 'primary_location': None, 'publication_year': 2018, 'authorships': ['Ji Ho Park', 'Jamin Shin', 'Pascale Fung']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 7/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** 6b6e3857-2aae-4c5c-acc6-6b1fb244e8fa<br>**Similarity:** 0.7986569232879219<br>**Text:** Source 7:\n",
       "Fighting reviewer fatigue or amplifying bias? Considerations and recommendations for us...<br>**Metadata:** {'title': 'Fighting reviewer fatigue or amplifying bias? Considerations and recommendations for use of ChatGPT and other Large Language Models in scholarly peer review', 'keywords': None, 'primary_location': 'Research Square (Research Square)', 'publication_year': 2023, 'authorships': ['Mohammad Hosseini', 'Serge P. J. M. Horbach']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 8/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** aec8a3ff-06a0-49f4-8710-ce61b41bfedb<br>**Similarity:** 0.7967739961805623<br>**Text:** Source 8:\n",
       "Fighting reviewer fatigue or amplifying bias? Considerations and recommendations for us...<br>**Metadata:** {'title': 'Fighting reviewer fatigue or amplifying bias? Considerations and recommendations for use of ChatGPT and other large language models in scholarly peer review', 'keywords': None, 'primary_location': 'Research Integrity and Peer Review', 'publication_year': 2023, 'authorships': ['Mohammad Hosseini', 'Serge P. J. M. Horbach']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 9/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** 0c540aa8-8858-49e0-bd10-4e7e8dc22567<br>**Similarity:** 0.7877998674549409<br>**Text:** Source 9:\n",
       "WinoQueer: A Community-in-the-Loop Benchmark for Anti-LGBTQ+ Bias in Large Language Mod...<br>**Metadata:** {'title': 'WinoQueer: A Community-in-the-Loop Benchmark for Anti-LGBTQ+ Bias in Large Language Models', 'keywords': None, 'primary_location': None, 'publication_year': 2023, 'authorships': ['Virginia K. Felkner', 'Ho-Chun Herbert Chang', 'Eugene Jang', 'Jonathan May']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "---"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**`Source Node 10/10`**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "**Node ID:** e1e96b1f-e675-4448-9884-576c222dbe32<br>**Similarity:** 0.7804667151678476<br>**Text:** Source 10:\n",
       "ChatGPT for good? On opportunities and challenges of large language models for educati...<br>**Metadata:** {'title': 'ChatGPT for good? On opportunities and challenges of large language models for education', 'keywords': None, 'primary_location': 'Learning and Individual Differences', 'publication_year': 2023, 'authorships': ['Enkelejda Kasneci', 'Kathrin Sessler', 'Stefan Küchemann', 'Maria Bannert', 'Daryna Dementieva', 'Frank Fischer', 'Urs Gasser', 'Georg Groh', 'Stephan Günnemann', 'Eyke Hüllermeier', 'Stepha Krusche', 'Gitta Kutyniok', 'Tilman Michaeli', 'Claudia Nerdel', 'Jürgen Pfeffer', 'Oleksandra Poquet', 'Michael Sailer', 'Albrecht Schmidt', 'Tina Seidel', 'Matthias Stadler', 'J. Weller', 'Jochen Kühn', 'Gjergji Kasneci']}<br>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_response(\n",
    "    response, show_source=True, source_length=100, show_source_metadata=True\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
